Relación con muertes

Autor/a

Brian Norman Peña-Calero

Fecha de publicación

8 de noviembre de 2023

1 Cargar paquetes

Código
library(tidyverse)
── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ dplyr     1.1.3     ✔ readr     2.1.4
✔ forcats   1.0.0     ✔ stringr   1.5.0
✔ ggplot2   3.4.4     ✔ tibble    3.2.1
✔ lubridate 1.9.3     ✔ tidyr     1.3.0
✔ purrr     1.0.2     
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
Código
library(lme4)
Loading required package: Matrix

Attaching package: 'Matrix'

The following objects are masked from 'package:tidyr':

    expand, pack, unpack
Código
library(lmerTest)

Attaching package: 'lmerTest'

The following object is masked from 'package:lme4':

    lmer

The following object is masked from 'package:stats':

    step
Código
options(scipen = 10)

2 Importar datos

Código
gbmt_fit_final <- readRDS("01_data/processed/gbmt_fit_final.rds")
deaths_by_year <- read_csv(file = "01_data/processed/deaths_by_year.csv")
Rows: 6945 Columns: 4
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (1): ISO2
dbl (3): SALID1, YEAR, deaths

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

3 Formato de datos

Código
gbmt_log_3 <- gbmt_fit_final %>% 
  filter(scale_name == "logarithmic", ng == 3) %>% 
  slice(1)

gbmt_log_3 <- gbmt_log_3$gbmt_fit_total[[1]]


gbmt_log_4 <- gbmt_fit_final %>% 
  filter(scale_name == "logarithmic", ng == 4) %>% 
  slice(1)

gbmt_log_4 <- gbmt_log_4$gbmt_fit_total[[1]]


gbmt_log <- gbmt_log_3 %>% 
  bind_rows(gbmt_log_4,
            .id = "Clusters") %>% 
  mutate(Clusters = case_match(Clusters,
                               "1" ~ 3,
                               "2" ~ 4))

Unificar datos:

Código
gbmt_log_merged <- gbmt_log %>%
  select(Clusters, ISO2, SALID1, YEAR = year, population_imp_norm:group) %>% 
  inner_join(
    deaths_by_year
  ) %>% 
  mutate(deaths_per_100k = (deaths / population_imp) * 100000)
Joining with `by = join_by(ISO2, SALID1, YEAR)`

4 Gráfico inicial de promedio de muertes

Código
gbmt_log_merged %>% 
  group_by(Clusters, group, YEAR) %>% 
  summarise(
    deaths_per_100k = mean(deaths_per_100k)
  ) %>% 
  ungroup() %>% 
  ggplot(aes(x = YEAR, y = deaths_per_100k, color = as.factor(group))) +
  geom_line(aes(group = group)) +
  geom_point() +
  labs(title = "Promedio de Muertes por 100,000 habitantes por Grupo a lo largo del Tiempo",
       x = "Año", y = "Muertes por 100,000 habitantes", color = "Grupo") +
  facet_wrap(vars(Clusters)) +
  theme_minimal()
`summarise()` has grouped output by 'Clusters', 'group'. You can override using
the `.groups` argument.

Código
ggsave(
  filename = "02_output/plots/promedio_muertes_group_log.png",
  dpi = 300,
  bg = "white"
)

5 Modelo lineal mixto

5.1 Modelo 1 (sin offset)

5.1.1 Lineal

Código
model_3 <- lmer(
  deaths_per_100k ~ YEAR * group + (1|SALID1), 
  data = gbmt_log_merged %>% filter(Clusters == 3)
)
summary(model_3)
Linear mixed model fit by REML. t-tests use Satterthwaite's method [
lmerModLmerTest]
Formula: deaths_per_100k ~ YEAR * group + (1 | SALID1)
   Data: gbmt_log_merged %>% filter(Clusters == 3)

REML criterion at convergence: 51046.2

Scaled residuals: 
    Min      1Q  Median      3Q     Max 
-9.2614 -0.3590 -0.0412  0.3379 10.4598 

Random effects:
 Groups   Name        Variance Std.Dev.
 SALID1   (Intercept) 568422   753.9   
 Residual              14350   119.8   
Number of obs: 3986, groups:  SALID1, 253

Fixed effects:
               Estimate  Std. Error          df t value Pr(>|t|)    
(Intercept) -15204.6579   1535.9204   3755.7399  -9.899   <2e-16 ***
YEAR             7.9367      0.7637   3730.1916  10.392   <2e-16 ***
group2      -19921.9123   1889.3145   3755.3268 -10.545   <2e-16 ***
group3       -4769.6320   2522.7822   3755.5491  -1.891   0.0588 .  
YEAR:group2     10.0842      0.9395   3730.2863  10.734   <2e-16 ***
YEAR:group3      2.4013      1.2545   3730.1344   1.914   0.0557 .  
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Correlation of Fixed Effects:
            (Intr) YEAR   group2 group3 YEAR:2
YEAR        -0.998                            
group2      -0.813  0.812                     
group3      -0.609  0.608  0.495              
YEAR:group2  0.812 -0.813 -0.998 -0.494       
YEAR:group3  0.608 -0.609 -0.494 -0.998  0.495
Código
model_4 <- lmer(
  deaths_per_100k ~ YEAR * group + (1|SALID1), 
  data = gbmt_log_merged %>% filter(Clusters == 4)
)
summary(model_4)
Linear mixed model fit by REML. t-tests use Satterthwaite's method [
lmerModLmerTest]
Formula: deaths_per_100k ~ YEAR * group + (1 | SALID1)
   Data: gbmt_log_merged %>% filter(Clusters == 4)

REML criterion at convergence: 51011.2

Scaled residuals: 
    Min      1Q  Median      3Q     Max 
-9.3443 -0.3480 -0.0255  0.3289 10.5276 

Random effects:
 Groups   Name        Variance Std.Dev.
 SALID1   (Intercept) 573344   757.2   
 Residual              14274   119.5   
Number of obs: 3986, groups:  SALID1, 253

Fixed effects:
               Estimate  Std. Error          df t value   Pr(>|t|)    
(Intercept) -29967.8679   1235.3518   3754.6485 -24.259    < 2e-16 ***
YEAR            15.4246      0.6143   3730.1321  25.110    < 2e-16 ***
group2       -9544.6955   2079.3163   3754.0162  -4.590 0.00000457 ***
group3       16889.0430   1990.6817   3755.0629   8.484    < 2e-16 ***
group4       11795.1602   3654.4782   3755.8523   3.228   0.001259 ** 
YEAR:group2      4.7775      1.0340   3729.4454   4.621 0.00000396 ***
YEAR:group3     -8.5523      0.9898   3729.8408  -8.640    < 2e-16 ***
YEAR:group4     -6.0243      1.8172   3728.9163  -3.315   0.000925 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Correlation of Fixed Effects:
            (Intr) YEAR   group2 group3 group4 YEAR:2 YEAR:3
YEAR        -0.998                                          
group2      -0.594  0.593                                   
group3      -0.621  0.620  0.369                            
group4      -0.338  0.337  0.201  0.210                     
YEAR:group2  0.593 -0.594 -0.998 -0.368 -0.200              
YEAR:group3  0.620 -0.621 -0.368 -0.998 -0.209  0.369       
YEAR:group4  0.337 -0.338 -0.200 -0.209 -0.998  0.201  0.210

5.1.2 Poisson

Código
model_3_poisson <- glmer(
  deaths ~ YEAR * group + (1|SALID1), 
  data = gbmt_log_merged %>% filter(Clusters == 3),
  family = poisson(link = "log")
)
Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, : Model is nearly unidentifiable: very large eigenvalue
 - Rescale variables?;Model is nearly unidentifiable: large eigenvalue ratio
 - Rescale variables?
Código
summary(model_3_poisson)
Generalized linear mixed model fit by maximum likelihood (Laplace
  Approximation) [glmerMod]
 Family: poisson  ( log )
Formula: deaths ~ YEAR * group + (1 | SALID1)
   Data: gbmt_log_merged %>% filter(Clusters == 3)

     AIC      BIC   logLik deviance df.resid 
 93071.6  93115.6 -46528.8  93057.6     3979 

Scaled residuals: 
    Min      1Q  Median      3Q     Max 
-27.244  -1.707  -0.105   1.581  34.411 

Random effects:
 Groups Name        Variance Std.Dev.
 SALID1 (Intercept) 0.9431   0.9711  
Number of obs: 3986, groups:  SALID1, 253

Fixed effects:
               Estimate  Std. Error z value Pr(>|z|)    
(Intercept) -28.1982479   0.2120517 -132.98   <2e-16 ***
YEAR          0.0179908   0.0000881  204.21   <2e-16 ***
group2       -3.4199338   0.2662270  -12.85   <2e-16 ***
group3      -15.0532898   0.5040324  -29.87   <2e-16 ***
YEAR:group2   0.0015343   0.0001120   13.70   <2e-16 ***
YEAR:group3   0.0071222   0.0002322   30.67   <2e-16 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Correlation of Fixed Effects:
            (Intr) YEAR   group2 group3 YEAR:2
YEAR        -0.834                            
group2      -0.797  0.665                     
group3      -0.420  0.351  0.335              
YEAR:group2  0.656 -0.787 -0.845 -0.276       
YEAR:group3  0.316 -0.379 -0.252 -0.925  0.298
optimizer (Nelder_Mead) convergence code: 0 (OK)
Model is nearly unidentifiable: very large eigenvalue
 - Rescale variables?
Model is nearly unidentifiable: large eigenvalue ratio
 - Rescale variables?
Código
model_4_poisson <- glmer(
  deaths ~ YEAR * group + (1|SALID1), 
  data = gbmt_log_merged %>% filter(Clusters == 4),
  family = poisson(link = "log")
)
Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, : Model is nearly unidentifiable: very large eigenvalue
 - Rescale variables?;Model is nearly unidentifiable: large eigenvalue ratio
 - Rescale variables?
Código
summary(model_4_poisson)
Generalized linear mixed model fit by maximum likelihood (Laplace
  Approximation) [glmerMod]
 Family: poisson  ( log )
Formula: deaths ~ YEAR * group + (1 | SALID1)
   Data: gbmt_log_merged %>% filter(Clusters == 4)

     AIC      BIC   logLik deviance df.resid 
 91102.0  91158.7 -45542.0  91084.0     3977 

Scaled residuals: 
    Min      1Q  Median      3Q     Max 
-27.499  -1.706  -0.088   1.531  34.979 

Random effects:
 Groups Name        Variance Std.Dev.
 SALID1 (Intercept) 0.9275   0.9631  
Number of obs: 3986, groups:  SALID1, 253

Fixed effects:
                Estimate   Std. Error  z value Pr(>|z|)    
(Intercept) -28.74899723   0.17419126 -165.043   <2e-16 ***
YEAR          0.01814074   0.00007399  245.190   <2e-16 ***
group2      -17.82188573   0.37318440  -47.756   <2e-16 ***
group3        0.12327807   0.27597423    0.447    0.655    
group4      -10.49954899   0.77767851  -13.501   <2e-16 ***
YEAR:group2   0.00865560   0.00016947   51.075   <2e-16 ***
YEAR:group3   0.00006760   0.00011577    0.584    0.559    
YEAR:group4   0.00497635   0.00036088   13.790   <2e-16 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Correlation of Fixed Effects:
            (Intr) YEAR   group2 group3 group4 YEAR:2 YEAR:3
YEAR        -0.853                                          
group2      -0.467  0.398                                   
group3      -0.631  0.538  0.295                            
group4      -0.224  0.191  0.105  0.141                     
YEAR:group2  0.372 -0.436 -0.912 -0.235 -0.083              
YEAR:group3  0.545 -0.639 -0.254 -0.842 -0.122  0.279       
YEAR:group4  0.175 -0.205 -0.082 -0.110 -0.932  0.089  0.131
optimizer (Nelder_Mead) convergence code: 0 (OK)
Model is nearly unidentifiable: very large eigenvalue
 - Rescale variables?
Model is nearly unidentifiable: large eigenvalue ratio
 - Rescale variables?

5.2 Modelo 2 - Con offset

5.2.1 Estimación

Estimación de los modelos con la información poblacional como offset

Código
model_3_poisson_off <- glmer(
  deaths ~ YEAR * group + (1|SALID1)  + offset(log(population_imp)), 
  data = gbmt_log_merged %>% filter(Clusters == 3),
  family = poisson(link = "log")
)
Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, :
Model failed to converge with max|grad| = 0.00333122 (tol = 0.002, component 1)
Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, : Model is nearly unidentifiable: very large eigenvalue
 - Rescale variables?;Model is nearly unidentifiable: large eigenvalue ratio
 - Rescale variables?
Código
summary(model_3_poisson_off)
Generalized linear mixed model fit by maximum likelihood (Laplace
  Approximation) [glmerMod]
 Family: poisson  ( log )
Formula: deaths ~ YEAR * group + (1 | SALID1) + offset(log(population_imp))
   Data: gbmt_log_merged %>% filter(Clusters == 3)

     AIC      BIC   logLik deviance df.resid 
125757.1 125801.2 -62871.6 125743.1     3979 

Scaled residuals: 
    Min      1Q  Median      3Q     Max 
-27.043  -1.978  -0.110   1.933  35.567 

Random effects:
 Groups Name        Variance Std.Dev.
 SALID1 (Intercept) 0.2631   0.5129  
Number of obs: 3986, groups:  SALID1, 253

Fixed effects:
                Estimate   Std. Error z value Pr(>|z|)    
(Intercept)   5.65891920   0.18770436   30.15   <2e-16 ***
YEAR         -0.00536626   0.00008828  -60.79   <2e-16 ***
group2      -43.12373070   0.23740987 -181.64   <2e-16 ***
group3      -36.89556012   0.47936169  -76.97   <2e-16 ***
YEAR:group2   0.02167489   0.00011215  193.27   <2e-16 ***
YEAR:group3   0.01844462   0.00023332   79.05   <2e-16 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Correlation of Fixed Effects:
            (Intr) YEAR   group2 group3 YEAR:2
YEAR        -0.944                            
group2      -0.791  0.747                     
group3      -0.393  0.371  0.310              
YEAR:group2  0.744 -0.787 -0.949 -0.292       
YEAR:group3  0.358 -0.379 -0.283 -0.977  0.299
optimizer (Nelder_Mead) convergence code: 0 (OK)
Model failed to converge with max|grad| = 0.00333122 (tol = 0.002, component 1)
Model is nearly unidentifiable: very large eigenvalue
 - Rescale variables?
Model is nearly unidentifiable: large eigenvalue ratio
 - Rescale variables?
Código
model_4_poisson_off <- glmer(
  deaths ~ YEAR * group + (1|SALID1) + offset(log(population_imp)), 
  data = gbmt_log_merged %>% filter(Clusters == 4),
  family = poisson(link = "log")
)
Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, : Model is nearly unidentifiable: very large eigenvalue
 - Rescale variables?;Model is nearly unidentifiable: large eigenvalue ratio
 - Rescale variables?
Código
summary(model_4_poisson_off)
Generalized linear mixed model fit by maximum likelihood (Laplace
  Approximation) [glmerMod]
 Family: poisson  ( log )
Formula: deaths ~ YEAR * group + (1 | SALID1) + offset(log(population_imp))
   Data: gbmt_log_merged %>% filter(Clusters == 4)

     AIC      BIC   logLik deviance df.resid 
119296.0 119352.6 -59639.0 119278.0     3977 

Scaled residuals: 
    Min      1Q  Median      3Q     Max 
-25.965  -1.993  -0.072   1.944  36.208 

Random effects:
 Groups Name        Variance Std.Dev.
 SALID1 (Intercept) 0.2627   0.5126  
Number of obs: 3986, groups:  SALID1, 253

Fixed effects:
                Estimate   Std. Error  z value Pr(>|z|)    
(Intercept) -33.91616080   0.15652727 -216.679   <2e-16 ***
YEAR          0.01453835   0.00007411  196.170   <2e-16 ***
group2      -19.42859056   0.35019735  -55.479   <2e-16 ***
group3       41.17822747   0.24616549  167.279   <2e-16 ***
group4        7.19853240   0.74096565    9.715   <2e-16 ***
YEAR:group2   0.00963692   0.00016957   56.832   <2e-16 ***
YEAR:group3  -0.02071338   0.00011605 -178.481   <2e-16 ***
YEAR:group4  -0.00374071   0.00036137  -10.352   <2e-16 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Correlation of Fixed Effects:
            (Intr) YEAR   group2 group3 group4 YEAR:2 YEAR:3
YEAR        -0.951                                          
group2      -0.448  0.426                                   
group3      -0.636  0.605  0.285                            
group4      -0.211  0.201  0.095  0.135                     
YEAR:group2  0.416 -0.438 -0.972 -0.265 -0.088              
YEAR:group3  0.608 -0.639 -0.272 -0.947 -0.129  0.280       
YEAR:group4  0.195 -0.205 -0.087 -0.124 -0.979  0.090  0.131
optimizer (Nelder_Mead) convergence code: 0 (OK)
Model is nearly unidentifiable: very large eigenvalue
 - Rescale variables?
Model is nearly unidentifiable: large eigenvalue ratio
 - Rescale variables?

Ingreso de los valores predictivos para los modelos de 3 y 4 clústers:

Código
gbmt_log_merged <- gbmt_log_merged %>% 
  group_nest(Clusters) %>% 
  mutate(
    data = map2(
      data, Clusters,
      ~ case_when(
        .y == 3 ~ mutate(
          .x, 
          predicted_deaths = predict(model_3_poisson_off, type = "response"),
        ),
        .y == 4 ~ mutate(
          .x, 
          predicted_deaths = predict(model_4_poisson_off, type = "response"),
        ),
      )
    )
  ) %>% 
  unnest()
Warning: `cols` is now required when using `unnest()`.
ℹ Please use `cols = c(data)`.
Código
gbmt_log_merged <- gbmt_log_merged %>% 
  mutate(
    predicted_deaths_100k = predicted_deaths / population_imp * 100000
  )

5.2.2 Gráfico

Debido a que se ha agregado a los interceptos de las ciudades como efecto aleatorio, entonces las estimaciones y predicciones van a variar de ciudad a ciudad. Para poder visualizarlo, se podría resumir los datos con su media.

Código
plot_poisson_offset <- gbmt_log_merged %>% 
  group_by(Clusters, group, YEAR) %>% 
  summarise(
    across(
      c(predicted_deaths_100k, 
        deaths_per_100k),
      mean
    )
  ) %>% 
  ungroup() %>% 
  ggplot(aes(x = YEAR, 
           y = predicted_deaths_100k,
           color = group)) + 
  geom_point(aes(y = deaths_per_100k), alpha = 0.5,
             position = position_jitter(h = 0.2)) +  # Datos observados
  geom_line() +
  labs(title = "Promedio de Muertes por 100,000 habitantes por Grupo",
       x = "Año", y = "Muertes esperadas por 100,000 habitantes", color = "Grupo") +
  facet_wrap(vars(Clusters)) +
  theme_minimal()
`summarise()` has grouped output by 'Clusters', 'group'. You can override using
the `.groups` argument.
Código
plot_poisson_offset

Código
ggsave(
  plot_poisson_offset,
  filename = "02_output/plots/promedio_muertes_group_poisson.png",
  dpi = 300,
  bg = "white",
  width = 7,
  height = 4.5
)

5.3 Modelo 3 - Con offset sin efectos aleatorios

5.3.1 Estimación

Las estimaciones no consideran variaciones entre las ciudades

Código
model_3_poisson_off2 <- glm(
  deaths ~ YEAR * group + offset(log(population_imp)), 
  data = gbmt_log_merged %>% filter(Clusters == 3),
  family = poisson(link = "log")
)

summary(model_3_poisson_off2)

Call:
glm(formula = deaths ~ YEAR * group + offset(log(population_imp)), 
    family = poisson(link = "log"), data = gbmt_log_merged %>% 
        filter(Clusters == 3))

Coefficients:
                Estimate   Std. Error z value Pr(>|z|)    
(Intercept)  20.91471063   0.17356342   120.5   <2e-16 ***
YEAR         -0.01298050   0.00008645  -150.2   <2e-16 ***
group2      -83.35453870   0.21854660  -381.4   <2e-16 ***
group3      -58.38416556   0.45938267  -127.1   <2e-16 ***
YEAR:group2   0.04187171   0.00010884   384.7   <2e-16 ***
YEAR:group3   0.02918818   0.00022878   127.6   <2e-16 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

(Dispersion parameter for poisson family taken to be 1)

    Null deviance: 7403371  on 3985  degrees of freedom
Residual deviance: 5079058  on 3980  degrees of freedom
AIC: 5116575

Number of Fisher Scoring iterations: 5
Código
model_4_poisson_off2 <- glm(
  deaths ~ YEAR * group + offset(log(population_imp)), 
  data = gbmt_log_merged %>% filter(Clusters == 4),
  family = poisson(link = "log")
)
summary(model_4_poisson_off2)

Call:
glm(formula = deaths ~ YEAR * group + offset(log(population_imp)), 
    family = poisson(link = "log"), data = gbmt_log_merged %>% 
        filter(Clusters == 4))

Coefficients:
                Estimate   Std. Error z value Pr(>|z|)    
(Intercept) -61.03060884   0.14169673 -430.71   <2e-16 ***
YEAR          0.02818326   0.00007055  399.46   <2e-16 ***
group2       -4.04165649   0.33191778  -12.18   <2e-16 ***
group3       83.26129484   0.22552208  369.19   <2e-16 ***
group4       33.79128295   0.71482476   47.27   <2e-16 ***
YEAR:group2   0.00195728   0.00016528   11.84   <2e-16 ***
YEAR:group3  -0.04182671   0.00011231 -372.41   <2e-16 ***
YEAR:group4  -0.01704939   0.00035601  -47.89   <2e-16 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

(Dispersion parameter for poisson family taken to be 1)

    Null deviance: 7403371  on 3985  degrees of freedom
Residual deviance: 5149021  on 3978  degrees of freedom
AIC: 5186542

Number of Fisher Scoring iterations: 5

Ingreso de las predicciones en el df:

Código
gbmt_log_merged2 <- gbmt_log_merged %>% 
  group_nest(Clusters) %>% 
  mutate(
    data = map2(
      data, Clusters,
      ~ case_when(
        .y == 3 ~ mutate(
          .x, 
          predicted_deaths = predict(model_3_poisson_off2, type = "response"),
        ),
        .y == 4 ~ mutate(
          .x, 
          predicted_deaths = predict(model_4_poisson_off2, type = "response"),
        ),
      )
    )
  ) %>% 
  unnest()
Warning: `cols` is now required when using `unnest()`.
ℹ Please use `cols = c(data)`.
Código
gbmt_log_merged2 <- gbmt_log_merged2 %>% 
  mutate(
    predicted_deaths_100k = predicted_deaths / population_imp * 100000
  )

5.3.2 Gráfico

Gráfico de los datos resumidos:

Código
gbmt_log_merged2 %>% 
  group_by(Clusters, group, YEAR) %>%
  summarise(
    across(
      c(predicted_deaths_100k,
        deaths_per_100k),
      mean
    )
  ) %>%
  ungroup() %>%
  ggplot(aes(x = YEAR, 
           y = predicted_deaths_100k,
           color = group)) + 
  geom_point(aes(y = deaths_per_100k), alpha = 0.5,
             position = position_jitter(h = 0.2)) +  # Datos observados
  geom_line(linewidth = 1) +
  labs(title = "Promedio de Muertes por 100,000 habitantes por Grupo a lo largo del Tiempo",
       x = "Año", y = "Muertes esperadas por 100,000 habitantes", color = "Grupo") +
  facet_wrap(vars(Clusters)) +
  theme_minimal()
`summarise()` has grouped output by 'Clusters', 'group'. You can override using
the `.groups` argument.

Sin resumir:

Código
gbmt_log_merged2 %>% 
  ggplot(aes(x = YEAR, 
           y = predicted_deaths_100k,
           color = group)) + 
  geom_point(aes(y = deaths_per_100k), alpha = 0.5,
             position = position_jitter(h = 0.2)) +  # Datos observados
  geom_line(linewidth = 1) +
  labs(title = "Promedio de Muertes por 100,000 habitantes por Grupo a lo largo del Tiempo",
       x = "Año", y = "Muertes esperadas por 100,000 habitantes", color = "Grupo") +
  facet_wrap(vars(Clusters)) +
  theme_minimal()